### Rachel Porter
### 12/1/2022
### Replication file for Male/Female Analysis, with substantive issue coverage

rm(list=ls())

# load libraries
library("cobalt")
library("WeightIt")
library("ebal")
library("jtools")
library("survey")
library("sensemakr")
library("stargazer")
library("MASS")
library("arm")
library("ggplot2")

# clear environment and set working directory
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")

# load data for male/female democrat analysis
master <- readRDS("male_democrats_data.rds")

# subset data into strategic and not for balancing 
strategic_pres <- subset(master, master$strategic == "Strategic" & 
                           master$Value != 3)
strategic_sub <- subset(master, master$strategic == "Strategic" & 
                          master$Value != 2)

not_pres <- subset(master, master$strategic == "Not" & master$Value != 3)
not_sub <- subset(master, master$strategic == "Not" & master$Value != 2)

############################### MALE PROFESSIONAL DEMOCRATS#################################

## estimating model, male against female who did not discuss women's issues
w.out.strat <- weightit(women_number ~ quality_cand + openrace + ss_party + 
                          primary_type + year + minority,
                          data = strategic_pres, estimand = "ATT", 
                          method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                          data = strategic_pres)
fit.strat1 <- svyglm(narrow_female ~ women_number + quality_cand + openrace + 
                          ss_party + primary_type + year + minority, 
                          design = d.w.strat, family=quasibinomial())

## estimating model, male against female who did discuss women's issues
w.out.strat2 <- weightit(women_number ~ quality_cand + openrace + ss_party + 
                          primary_type + year + minority,
                          data = strategic_sub, estimand = "ATT", 
                          method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat2$weights,
                          data = strategic_sub)
fit.strat2 <- svyglm(narrow_female ~ women_number + quality_cand + openrace + 
                          ss_party + primary_type + year + minority, 
                          design = d.w.strat, family=quasibinomial())

# Simulating predicted probabilities for fit.strat1
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat1$coef
vcv <- vcov(fit.strat1)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat1$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(0,0)
quality_cand1 <- c(1,1)
`ss_partyDem Safe` <- c(1,1)
`ss_partyRep Safe` <- c(0,0)
year2020 <- c(0,0)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, quality_cand1, 
                    openrace1, `ss_partyDem Safe`, `ss_partyRep Safe`, 
                    `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_strat <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat, prob = .05)
hi <- quantile(pe_strat, prob = .95)
fem_vector = c(mean(pe_strat), lo, hi)

# Simulating predicted probabilities for fit.strat2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat2$coef
vcv <- vcov(fit.strat2)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat2$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
`primary_typeOpen Primary` <- c(0,0)
quality_cand1 <- c(1,1)
`ss_partyDem Safe` <- c(1,1)
`ss_partyRep Safe` <- c(0,0)
year2020 <- c(0,0)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, quality_cand1, openrace1, 
                    `ss_partyDem Safe`, `ss_partyRep Safe`,
                    `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_strat2 <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_strat2, prob = .05)
hi <- quantile(pe_strat2, prob = .95)
fem_vector2 = c(mean(pe_strat2), lo, hi)

# saving simulations to create Figure 3
fem_vector <- rbind.data.frame(fem_vector, fem_vector2)
colnames(fem_vector) <- c("pe", "lo", "hi")
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(fem_vector, file = "figure_3_female.rds")

rm(list=setdiff(ls(), c("not_pres", "not_sub", "pe_strat")))

############################### MALE AMATEUR DEMOCRATS#################################

## estimating model, male against female who did not discuss women's issues
w.out.strat <- weightit(women_number ~ openrace + ss_party + primary_type + 
                        year + minority, data = not_pres, estimand = "ATT", 
                        method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat$weights,
                        data = not_pres)
fit.strat1 <- svyglm(narrow_female ~ women_number + openrace + ss_party + 
                        primary_type + year + minority, 
                        design = d.w.strat, family=quasibinomial())

## estimating model, male against female who did discuss women's issues
w.out.strat2 <- weightit(women_number ~ openrace + ss_party + primary_type + 
                        year + minority, data = not_sub, estimand = "ATT", 
                        method = "ebal")
d.w.strat <- svydesign(ids = ~1, weights = w.out.strat2$weights,
                        data = not_sub)
fit.strat2 <- svyglm(narrow_female ~ women_number + openrace + ss_party + 
                        primary_type + year + minority, 
                        design = d.w.strat, family=quasibinomial())

# Simulating predicted probabilities for fit.strat1
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat1$coef
vcv <- vcov(fit.strat1)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat1$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), 
           se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
`ss_partyDem Safe` <- c(1,1)
`ss_partyRep Safe` <- c(0,0)
`primary_typeOpen Primary` <- c(1,1)
year2020 <- c(0,0)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, openrace1, `ss_partyDem Safe`, 
            `ss_partyRep Safe`, `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not <- pp.sim[,2] - pp.sim[,1]

lo <- quantile(pe_not, prob = .05)
hi <- quantile(pe_not, prob = .95)
fem_vector = c(mean(pe_not), hi, lo)

# Simulating predicted probabilities for fit.strat2
set.seed(12484)
m <- 1000

# Simulate coefficients from a multivariate normal
betas <- fit.strat2$coef
vcv <- vcov(fit.strat2)
sim.betas <- mvrnorm(m, betas, vcv)

# Check to see if the simulated coefficients look like the real results
round(fit.strat2$coef, digits = 2)
round(head(sim.betas, 10), digits = 2)
data.frame(sim.means = apply(sim.betas, 2, mean), 
           betas = betas, sim.sd = apply(sim.betas, 2, sd), se = sqrt(diag(vcv)))   

# Set values for all other variables 
women_number <- c(0,1)
openrace1 <- c(0,0)
`ss_partyDem Safe` <- c(1,1)
`ss_partyRep Safe` <- c(0,0)
`primary_typeOpen Primary` <- c(1,1)
year2020 <- c(0,0)
minority1 <- c(0,0)

# Create hypothetical independent variable profile
x.nes <- data.frame(intercept = 1, women_number, openrace1, `ss_partyDem Safe`, 
            `ss_partyRep Safe`, `primary_typeOpen Primary`, year2020, minority1)

# Compute the predicted probabilities using the estimated coefficients
pp.betas <- invlogit(as.matrix(x.nes)%*%betas)

# Compute pp and confidence intervals using the simulated coefficients
pp.sim <- matrix(NA, nrow = m, ncol = length(women_number))

for(i in 1:m){
  pp.sim[i, ] <- invlogit(as.matrix(x.nes)%*%sim.betas[i, ]) 
}

pe_not2 <- pp.sim[,2] - pp.sim[,1]
lo <- quantile(pe_not2, prob = .05)
hi <- quantile(pe_not2, prob = .95)
fem_vector2 = c(mean(pe_not2), lo, hi)

# running ttest discussed in manuscript
t.test(pe_strat, pe_not)

# saving simulations to create Figure 4
fem_vector <- rbind.data.frame(fem_vector, fem_vector2)
colnames(fem_vector) <- c("pe", "lo", "hi")
setwd("~/Dropbox/Primary_Elections/Issue Paper/Replication_Archive/JOP_Replication_Files")
saveRDS(fem_vector, file = "figure_4_female.rds")
